static unsigned int get_domnr(void)
{
-#if 1
static unsigned int domnr = 0;
return ++domnr;
-#else
- struct task_struct *p = &idle0_task;
- unsigned long dom_mask = 0;
- read_lock_irq(&tasklist_lock);
- do {
- if ( is_idle_task(p) ) continue;
- set_bit(p->domain, &dom_mask);
- }
- while ( (p = p->next_task) != &idle0_task );
- read_unlock_irq(&tasklist_lock);
- return (dom_mask == ~0UL) ? 0 : ffz(dom_mask);
-#endif
}
static void build_page_list(struct task_struct *p)
sched_add_domain(p);
+ INIT_LIST_HEAD(&p->net_vifs);
+
p->net_ring_base = (net_ring_t *)(p->shared_info + 1);
INIT_LIST_HEAD(&p->pg_head);
p->max_pages = p->tot_pages = 0;
/* Kill the currently executing domain. */
void kill_domain(void)
{
+ struct list_head *ent;
+ net_vif_t *vif;
+
if ( current->domain == 0 )
{
extern void machine_restart(char *);
}
printk("Killing domain %d\n", current->domain);
-
+
sched_rem_domain(current);
+
+ unlink_blkdev_info(current);
+
+ while ( (ent = current->net_vifs.next) != ¤t->net_vifs )
+ {
+ vif = list_entry(ent, net_vif_t, dom_list);
+ unlink_net_vif(vif);
+ }
+
schedule();
BUG(); /* never get here */
}
list_del(&pf->list);
list_add_tail(&pf->list, &p->pg_head);
free_pfns--;
+ ASSERT(free_pfns != 0);
}
spin_unlock_irqrestore(&free_list_lock, flags);
p->tot_pages = req_pages;
- // temporary, max_pages should be explicitly specified
+ /* TEMPORARY: max_pages should be explicitly specified. */
p->max_pages = p->tot_pages;
return 0;
void free_all_dom_mem(struct task_struct *p)
{
- struct list_head *list_ent, *tmp;
+ struct list_head *ent;
+ unsigned long flags;
- list_for_each_safe(list_ent, tmp, &p->pg_head)
+ spin_lock_irqsave(&free_list_lock, flags);
+ while ( (ent = p->pg_head.next) != &p->pg_head )
{
- struct pfn_info *pf = list_entry(list_ent, struct pfn_info, list);
+ struct pfn_info *pf = list_entry(ent, struct pfn_info, list);
pf->type_count = pf->tot_count = pf->flags = 0;
- list_del(list_ent);
- list_add(list_ent, &free_list);
+ ASSERT(ent->next->prev == ent);
+ ASSERT(ent->prev->next == ent);
+ list_del(ent);
+ list_add(ent, &free_list);
+ free_pfns++;
}
+ spin_unlock_irqrestore(&free_list_lock, flags);
p->tot_pages = 0;
}
{
ASSERT(p->state == TASK_DYING);
ASSERT(!p->has_cpu);
+
+ printk("Releasing task %d\n", p->domain);
+
write_lock_irq(&tasklist_lock);
REMOVE_LINKS(p);
write_unlock_irq(&tasklist_lock);
- /* XXX SMH: so below is screwed currently; need ref counting on vifs,
- vhds, etc and proper clean up. Until then just blow the memory :-( */
-#if 0
/*
- * Safe! Only queue skbuffs with tasklist_lock held.
- * Only access shared_info with tasklist_lock held.
- * And free_task_struct() only releases if refcnt == 0.
+ * This frees up blkdev rings. Totally safe since blkdev ref counting
+ * actually uses the task_struct refcnt.
*/
- while ( p->num_net_vifs )
- {
- destroy_net_vif(p);
- }
-
- free_page((unsigned long)p->mm.perdomain_pt);
-
destroy_blkdev_info(p);
+ /* Free all memory associated with this domain. */
+ free_page((unsigned long)p->mm.perdomain_pt);
UNSHARE_PFN(virt_to_page(p->shared_info));
free_page((unsigned long)p->shared_info);
-
free_all_dom_mem(p);
-
- free_task_struct(p);
-#else
- printk("XEN::release_task: not freeing memory etc yet XXX FIXME.\n");
-#endif
+ free_pages((unsigned long)p, 1);
}
kmem_cache_t *net_vif_cache;
kmem_cache_t *net_rule_cache;
static rwlock_t net_rule_lock = RW_LOCK_UNLOCKED; /* rule mutex */
-static rwlock_t sys_vif_lock = RW_LOCK_UNLOCKED; /* vif mutex */
+
+rwlock_t sys_vif_lock = RW_LOCK_UNLOCKED; /* vif mutex */
void print_net_rule_list();
net_ring_t *new_ring;
net_shadow_ring_t *shadow_ring;
struct task_struct *dom_task;
-
+ unsigned long flags;
+
if ( !(dom_task = find_domain_by_id(domain)) )
return NULL;
shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0;
shadow_ring->tx_prod = shadow_ring->tx_cons = shadow_ring->tx_idx = 0;
- /* Fill in the new vif struct. */
-
- new_vif->net_ring = new_ring;
+ /*
+ * Fill in the new vif struct. Note that, while the vif's refcnt is
+ * non-zero, we hold a reference to the task structure.
+ */
+ atomic_set(&new_vif->refcnt, 1);
+ new_vif->net_ring = new_ring;
new_vif->shadow_ring = shadow_ring;
-
- new_vif->domain = dom_task;
+ new_vif->domain = dom_task;
+ new_vif->list.next = NULL;
- new_vif->list.next = NULL;
-
- write_lock(&sys_vif_lock);
+ list_add(&new_vif->dom_list, &dom_task->net_vifs);
+ dom_task->num_net_vifs++;
+
+ write_lock_irqsave(&sys_vif_lock, flags);
new_vif->id = sys_vif_count;
sys_vif_list[sys_vif_count++] = new_vif;
- write_unlock(&sys_vif_lock);
-
- dom_task->net_vif_list[dom_task->num_net_vifs] = new_vif;
- dom_task->num_net_vifs++;
+ write_unlock_irqrestore(&sys_vif_lock, flags);
- free_task_struct(dom_task);
return new_vif;
fail:
return NULL;
}
-/* delete_net_vif - Delete the last vif in the given domain.
- *
- * There doesn't seem to be any reason (yet) to be able to axe an arbitrary
- * vif, by vif id.
- */
-
-void destroy_net_vif(struct task_struct *p)
+void destroy_net_vif(net_vif_t *vif)
{
int i;
+ unsigned long *pte, flags;
+ struct pfn_info *page;
+ struct task_struct *p = vif->domain;
+
+ /* Return any outstanding receive buffers to the guest OS. */
+ spin_lock_irqsave(&p->page_lock, flags);
+ for ( i = vif->shadow_ring->rx_idx;
+ i != vif->shadow_ring->rx_prod;
+ i = ((i+1) & (RX_RING_SIZE-1)) )
+ {
+ rx_shadow_entry_t *rx = vif->shadow_ring->rx_ring + i;
+ if ( rx->status != RING_STATUS_OK ) continue;
+ pte = map_domain_mem(rx->addr);
+ *pte |= _PAGE_PRESENT;
+ page = frame_table + (*pte >> PAGE_SHIFT);
+ page->flags &= ~PG_type_mask;
+ if ( (*pte & _PAGE_RW) )
+ page->flags |= PGT_writeable_page | PG_need_flush;
+ unmap_domain_mem(pte);
+ }
+ spin_unlock_irqrestore(&p->page_lock, flags);
- if ( p->num_net_vifs <= 0 ) return; // nothing to do.
-
- i = --p->num_net_vifs;
-
- write_lock(&sys_vif_lock);
- sys_vif_list[p->net_vif_list[i]->id] = NULL; // system vif list not gc'ed
- write_unlock(&sys_vif_lock);
-
- kfree(p->net_vif_list[i]->shadow_ring->tx_ring);
- kfree(p->net_vif_list[i]->shadow_ring->rx_ring);
- kfree(p->net_vif_list[i]->shadow_ring);
- kmem_cache_free(net_vif_cache, p->net_vif_list[i]);
+ kfree(vif->shadow_ring->tx_ring);
+ kfree(vif->shadow_ring->rx_ring);
+ kfree(vif->shadow_ring);
+ kmem_cache_free(net_vif_cache, vif);
+ free_task_struct(p);
}
+void unlink_net_vif(net_vif_t *vif)
+{
+ unsigned long flags;
+ list_del(&vif->dom_list);
+ vif->domain->num_net_vifs--;
+ write_lock_irqsave(&sys_vif_lock, flags);
+ sys_vif_list[vif->id] = NULL;
+ write_unlock_irqrestore(&sys_vif_lock, flags);
+ put_vif(vif);
+}
+
+
/* vif_query - Call from the proc file system to get a list of vifs
* assigned to a particular domain.
*/
void vif_query(vif_query_t *vq)
{
- struct task_struct *dom_task;
+ net_vif_t *vif;
+ struct task_struct *p;
+ unsigned long flags;
char buf[128];
int i;
- if ( !(dom_task = find_domain_by_id(vq->domain)) ) return;
+ if ( !(p = find_domain_by_id(vq->domain)) )
+ return;
*buf = '\0';
- for ( i = 0; i < dom_task->num_net_vifs; i++ )
- sprintf(buf + strlen(buf), "%d\n", dom_task->net_vif_list[i]->id);
+ read_lock_irqsave(&sys_vif_lock, flags);
+ for ( i = 0; i < MAX_SYSTEM_VIFS; i++ )
+ {
+ vif = sys_vif_list[i];
+ if ( (vif == NULL) || (vif->domain != p) ) continue;
+ sprintf(buf + strlen(buf), "%d\n", vif->id);
+ }
+ read_unlock_irqrestore(&sys_vif_lock, flags);
copy_to_user(vq->buf, buf, strlen(buf) + 1);
- free_task_struct(dom_task);
+ free_task_struct(p);
}
-
-/* print_vif_list - Print the contents of the global vif table.
- */
-
-void print_vif_list()
-{
- int i;
- net_vif_t *v;
-
- printk("Currently, there are %d VIFs.\n", sys_vif_count);
- for ( i = 0; i<sys_vif_count; i++ )
- {
- v = sys_vif_list[i];
- printk("] VIF Entry %d(%d):\n", i, v->id);
- printk(" > net_ring*: %p\n", v->net_ring);
- printk(" > domain : %u\n", v->domain->domain);
- }
-}
-
/* ----[ Net Rule Functions ]-----------------------------------------------*/
/* add_net_rule - Add a new network filter rule.
prev = schedule_data[this_cpu].prev;
prev->policy &= ~SCHED_YIELD;
- if ( prev->state == TASK_DYING ) release_task(prev);
+ if ( prev->state == TASK_DYING )
+ free_task_struct(prev);
same_process:
/* update the domains notion of time */
{
list_del(&p->blkdev_list);
p->blkdev_list.next = NULL;
+ free_task_struct(p);
}
spin_unlock_irqrestore(&io_schedule_list_lock, flags);
}
if ( !__on_blkdev_list(p) )
{
list_add_tail(&p->blkdev_list, &io_schedule_list);
+ get_task_struct(p);
}
spin_unlock_irqrestore(&io_schedule_list_lock, flags);
}
{
ent = io_schedule_list.next;
p = list_entry(ent, struct task_struct, blkdev_list);
+ get_task_struct(p);
remove_from_blkdev_list(p);
if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
add_to_blkdev_list_tail(p);
+ free_task_struct(p);
}
/* Push the batch through to disc. */
{
make_response(pending_req->domain, pending_req->id,
pending_req->operation, pending_req->status);
+ free_task_struct(pending_req->domain);
spin_lock_irqsave(&pend_prod_lock, flags);
pending_ring[pending_prod] = pending_req - pending_reqs;
PENDREQ_IDX_INC(pending_prod);
pending_req->status = 0;
atomic_set(&pending_req->pendcnt, nr_psegs);
+ get_task_struct(p);
+
/* Now we pass each segment down to the real blkdev layer. */
for ( i = 0; i < nr_psegs; i++ )
{
xen_refresh_segment_list(p);
}
-/* End-of-day teardown for a domain. XXX Outstanding requests? */
+/* End-of-day teardown for a domain. */
void destroy_blkdev_info(struct task_struct *p)
{
- remove_from_blkdev_list(p);
+ ASSERT(!__on_blkdev_list(p));
UNSHARE_PFN(virt_to_page(p->blk_ring_base));
free_page((unsigned long)p->blk_ring_base);
}
+void unlink_blkdev_info(struct task_struct *p)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&io_schedule_list_lock, flags);
+ if ( __on_blkdev_list(p) )
+ {
+ list_del(&p->blkdev_list);
+ p->blkdev_list.next = (void *)0xdeadbeef; /* prevent reinsertion */
+ free_task_struct(p);
+ }
+ spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+}
+
void initialize_block_io ()
{
int i;
#define alloc_task_struct() \
((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
#define free_task_struct(_p) \
- if ( atomic_dec_and_test(&(_p)->refcnt) ) free_pages((unsigned long)(_p), 1)
+ if ( atomic_dec_and_test(&(_p)->refcnt) ) release_task(_p)
#define get_task_struct(_p) \
atomic_inc(&(_p)->refcnt)
extern kdev_t xendev_to_physdev(unsigned short xendev);
extern void init_blkdev_info(struct task_struct *);
+extern void unlink_blkdev_info(struct task_struct *);
extern void destroy_blkdev_info(struct task_struct *);
extern int unregister_blkdev(unsigned int, const char *);
#ifndef _LINUX_LIST_H
#define _LINUX_LIST_H
+#include <xeno/lib.h>
+
/*
* Simple doubly linked list implementation.
*
*/
static __inline__ void list_del(struct list_head *entry)
{
+ ASSERT(entry->next->prev == entry);
+ ASSERT(entry->prev->next == entry);
__list_del(entry->prev, entry->next);
}
long warped; /* time it ran warped last time */
long uwarped; /* time it ran unwarped last time */
-
/* Network I/O */
net_ring_t *net_ring_base;
- net_vif_t *net_vif_list[MAX_GUEST_VIFS];
+ struct list_head net_vifs;
int num_net_vifs;
/* Block I/O */
net_shadow_ring_t *shadow_ring;
int id;
struct task_struct *domain;
- struct list_head list;
+ struct list_head list; /* scheduling list */
+ struct list_head dom_list; /* domain list */
+ atomic_t refcnt;
} net_vif_t;
+#define get_vif(_v) (atomic_inc(&(_v)->refcnt))
+#define put_vif(_v) \
+do { \
+ if ( atomic_dec_and_test(&(_v)->refcnt) ) destroy_net_vif(_v); \
+} while (0) \
+
/* VIF-related defines. */
#define MAX_GUEST_VIFS 2 // each VIF is a small overhead in task_struct
#define MAX_SYSTEM_VIFS 256
/* vif globals */
extern int sys_vif_count;
extern net_vif_t *sys_vif_list[];
+extern rwlock_t sys_vif_lock; /* protects the sys_vif_list */
/* vif prototypes */
net_vif_t *create_net_vif(int domain);
-void destroy_net_vif(struct task_struct *p);
+void destroy_net_vif(net_vif_t *vif);
+void unlink_net_vif(net_vif_t *vif);
void add_default_net_rule(int vif_id, u32 ipaddr);
int __net_get_target_vif(u8 *data, unsigned int len, int src_vif);
void add_default_net_rule(int vif_id, u32 ipaddr);
if ( (i = shadow_ring->rx_idx) == shadow_ring->rx_prod )
return;
- if ( shadow_ring->rx_ring[i].status != RING_STATUS_OK )
+ rx = shadow_ring->rx_ring + i;
+
+ if ( rx->status != RING_STATUS_OK )
{
DPRINTK("Bad buffer in deliver_packet()\n");
goto inc_and_out;
}
- rx = shadow_ring->rx_ring + i;
ASSERT(skb->len <= PAGE_SIZE);
rx->size = skb->len;
rx->offset = (unsigned char)((unsigned long)skb->data & ~PAGE_MASK);
g_pfn = frame_table + (*g_pte >> PAGE_SHIFT);
h_pfn = skb->pf;
- h_pfn->tot_count = h_pfn->type_count = 1;
- g_pfn->tot_count = g_pfn->type_count = 0;
+ h_pfn->tot_count = 1;
+ g_pfn->tot_count = g_pfn->type_count = h_pfn->type_count = 0;
h_pfn->flags = g_pfn->flags & ~PG_type_mask;
-
- if (*g_pte & _PAGE_RW) h_pfn->flags |= PGT_writeable_page | PG_need_flush;
g_pfn->flags = 0;
+ if ( (*g_pte & _PAGE_RW) )
+ {
+ h_pfn->flags |= PGT_writeable_page | PG_need_flush;
+ h_pfn->type_count = 1;
+ }
+
/* Point the guest at the new machine frame. */
machine_to_phys_mapping[h_pfn - frame_table]
= machine_to_phys_mapping[g_pfn - frame_table];
unmap_domain_mem(g_pte);
+ list_del(&g_pfn->list);
+ list_add(&h_pfn->list, &vif->domain->pg_head);
+
spin_unlock_irqrestore(&vif->domain->page_lock, flags);
/* Our skbuff now points at the guest's old frame. */
if ( skb->dst_vif == VIF_UNKNOWN_INTERFACE )
skb->dst_vif = __net_get_target_vif(skb->data, skb->len, skb->src_vif);
- if ( ((vif = sys_vif_list[skb->dst_vif]) == NULL) ||
- (skb->dst_vif <= VIF_PHYSICAL_INTERFACE) )
+ read_lock_irqsave(&sys_vif_lock, flags);
+ if ( (skb->dst_vif <= VIF_PHYSICAL_INTERFACE) ||
+ ((vif = sys_vif_list[skb->dst_vif]) == NULL) )
{
+ read_unlock_irqrestore(&sys_vif_lock, flags);
netdev_rx_stat[this_cpu].dropped++;
unmap_domain_mem(skb->head);
kfree_skb(skb);
return NET_RX_DROP;
}
+ get_vif(vif);
+ read_unlock_irqrestore(&sys_vif_lock, flags);
+
deliver_packet(skb, vif);
cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET_RX);
+ put_vif(vif);
+
unmap_domain_mem(skb->head);
kfree_skb(skb);
hyp_event_notify(cpu_mask);
static void remove_from_net_schedule_list(net_vif_t *vif)
{
unsigned long flags;
- if ( !__on_net_schedule_list(vif) ) return;
spin_lock_irqsave(&net_schedule_list_lock, flags);
- if ( __on_net_schedule_list(vif) )
- {
- list_del(&vif->list);
- vif->list.next = NULL;
- }
+ ASSERT(__on_net_schedule_list(vif));
+ list_del(&vif->list);
+ vif->list.next = NULL;
+ put_vif(vif);
spin_unlock_irqrestore(&net_schedule_list_lock, flags);
}
if ( !__on_net_schedule_list(vif) )
{
list_add_tail(&vif->list, &net_schedule_list);
+ get_vif(vif);
}
spin_unlock_irqrestore(&net_schedule_list_lock, flags);
}
cpu_mask = mark_guest_event(vif->domain, _EVENT_NET_TX);
guest_event_notify(cpu_mask);
}
+
+ put_vif(vif);
}
/* Get a vif from the list with work to do. */
ent = net_schedule_list.next;
vif = list_entry(ent, net_vif_t, list);
+ get_vif(vif);
remove_from_net_schedule_list(vif);
if ( vif->shadow_ring->tx_idx == vif->shadow_ring->tx_prod )
+ {
+ put_vif(vif);
continue;
+ }
/* Pick an entry from the transmit queue. */
tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx];
add_to_net_schedule_list_tail(vif);
/* Check the chosen entry is good. */
- if ( tx->status != RING_STATUS_OK ) continue;
+ if ( tx->status != RING_STATUS_OK )
+ {
+ put_vif(vif);
+ continue;
+ }
if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL )
{
printk("Out of memory in net_tx_action()!\n");
tx->status = RING_STATUS_BAD_PAGE;
+ put_vif(vif);
break;
}
shared_info_t *s = current->shared_info;
net_ring_t *net_ring;
net_shadow_ring_t *shadow_ring;
- unsigned int nvif;
-
+ net_vif_t *vif;
+ struct list_head *ent;
+
clear_bit(_HYP_EVENT_NET_RX, ¤t->hyp_events);
- for ( nvif = 0; nvif < current->num_net_vifs; nvif++ )
+ list_for_each(ent, ¤t->net_vifs)
{
- net_ring = current->net_vif_list[nvif]->net_ring;
- shadow_ring = current->net_vif_list[nvif]->shadow_ring;
+ vif = list_entry(ent, net_vif_t, dom_list);
+ net_ring = vif->net_ring;
+ shadow_ring = vif->shadow_ring;
/* This would mean that the guest OS has fiddled with our index. */
if ( shadow_ring->rx_cons != net_ring->rx_cons )
long do_net_update(void)
{
+ struct list_head *ent;
net_ring_t *net_ring;
net_shadow_ring_t *shadow_ring;
net_vif_t *current_vif;
- unsigned int i, j;
+ unsigned int i;
struct sk_buff *skb;
tx_entry_t tx;
rx_shadow_entry_t *rx;
unsigned long pfn;
struct pfn_info *page;
unsigned long *g_pte;
+ int target;
+ u8 *g_data;
+ unsigned short protocol;
- for ( j = 0; j < current->num_net_vifs; j++)
+ list_for_each(ent, ¤t->net_vifs)
{
- int target;
- u8 *g_data;
- unsigned short protocol;
-
- current_vif = current->net_vif_list[j];
- net_ring = current_vif->net_ring;
+ current_vif = list_entry(ent, net_vif_t, dom_list);
+ net_ring = current_vif->net_ring;
shadow_ring = current_vif->shadow_ring;
/*
goto tx_unmap_and_continue;
skb->destructor = tx_skb_release;
+ get_vif(current_vif);
shadow_ring->tx_ring[i].status = RING_STATUS_OK;
if ( (pfn >= max_page) ||
(page->flags != (PGT_l1_page_table | current->domain)) )
{
- DPRINTK("Bad page frame containing ppte\n");
+ DPRINTK("Bad page frame for ppte %d,%08lx,%08lx,%08lx\n",
+ current->domain, pfn, max_page, page->flags);
spin_unlock_irq(¤t->page_lock);
continue;
}
g_pte = map_domain_mem(rx->addr);
- if (!(*g_pte & _PAGE_PRESENT))
+ if ( !(*g_pte & _PAGE_PRESENT) )
{
DPRINTK("Inavlid PTE passed down (not present)\n");
goto rx_unmap_and_continue;
page = (*g_pte >> PAGE_SHIFT) + frame_table;
- if (page->tot_count != 1)
+ if ( page->tot_count != 1 )
{
DPRINTK("RX page mapped multple times (%d/%d/%08x)\n",
page->type_count, page->tot_count, page->flags);
list_ptr = free_list.next;
pf = list_entry(list_ptr, struct pfn_info, list);
- pf->flags = 0; /* owned by dom0 */
+ pf->flags = 0;
list_del(&pf->list);
free_pfns--;
spin_lock_irqsave(&free_list_lock, flags);
+ pf->flags = pf->type_count = pf->tot_count = 0;
list_add(&pf->list, &free_list);
free_pfns++;